library(tidyverse)
library(Biostrings)
library(DECIPHER)
library(ggseqlogo)
library(htmltools)
library(plotly)
petases <- readAAStringSet("seqdump.txt")
petases <- AAStringSet(petases)
aligned_petases <- AlignSeqs(petases)
## Determining distance matrix based on shared 5-mers:
## ================================================================================
##
## Time difference of 0 secs
##
## Clustering into groups by similarity:
## ================================================================================
##
## Time difference of 0.06 secs
##
## Aligning Sequences:
## ================================================================================
##
## Time difference of 0.12 secs
##
## Iteration 1 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.11 secs
##
## Iteration 2 of 2:
##
## Determining distance matrix based on alignment:
## ================================================================================
##
## Time difference of 0 secs
##
## Reclustering into groups by similarity:
## ================================================================================
##
## Time difference of 0 secs
##
## Realigning Sequences:
## ================================================================================
##
## Time difference of 0.01 secs
BrowseSeqs(aligned_petases, htmlFile = "output.html", openURL = FALSE)
htmltools::includeHTML("output.html")
20 40 60 80 100 120 140 160 180 200 220 240 260 280 300
'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|'''''''''|''
6ILX_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] --------------------------MQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGFSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS--HHHHHH-------------- 270
5XFY_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] -----------------------------NPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWAMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS---------------------- 261
5XJH_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ 300
5XFZ_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ----------------------------MNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALGQVASLNGTSSSPIYGKVDTARMGVMGWAMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS---------------------- 262
5XH3_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] -----------------------------NPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALGQVASLNGTSSSPIYGKVDTARMGVMGWAMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCS---------------------- 261
6KUO_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSDQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ 300
5YNS_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] -----------------------------GSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCSLED------------------- 264
6IJ5_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAAQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEARLAAATAEQ 300
5YFE_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] --------------------------AQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPNNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSANAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTNNCSLEHHHHHH-------------- 272
6IJ4_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEARLAAATAEQ 300
6KUQ_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPSSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWDSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQDLIGKKGVAWMKRFMDNDTRYSTFACENPNSTKVSDFRTANCSLEDPAANKARKEAELAAATAEQ 300
6IJ3_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPDSRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEARLAAATAEQ 300
6IJ6_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCSLEDPAANKARKEARLAAATAEQ 300
6KUS_A Chain A, Poly(ethylene terephthalate) hydrolase [Ideonella sakaiensis] ------------MGSSHHHHHHSSGLVPRGSHMRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTLDQPESRSSQQMAALRQVASLNGTSSSPIYGKVDTARMGVMGWSMGGGGSLISAANNPSLKAAAPQAPWHSSTNFSSVTVPTLIFACENDSIAPVNSSALPIYDSMSRNAKQFLEINGGSHSCANTGNSDQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTRVSDFRTANCSLEDPAANKARKEAELAAATAEQ 300
6KY5_A Chain A, PET hydrolase [Ideonella sakaiensis] MNFPRASRLMQAAVLGGLMAVSAAATAQTNPYARGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPTNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTFDYPSSRSSQQMAALRQVASLNGDSSSPIYGKVDTARMGVMGHSMGGGASLRSAANNPSLKAAIPQAPWDSQTNFSSVTVPTLIFACENDSIAPVNSHALPIYDSMSRNAKQFLEINGGSHSCANSGNSNQALIGKKGVAWMKRFMDNDTRYSTFACENPNSTAVSDFRTANCSLEHHHHHH-------------- 298
Consensus MNFPRASRLMQAXXXXXXXXXXXXXXXXXXXXXRGPNPTAASLEASAGPFTVRSFTVSRPSGYGAGTVYYPXNAGGTVGAIAIVPGYTARQSSIKWWGPRLASHGFVVITIDTNSTXDXPXSRSSQQMAALXQVASLNGXSSSPIYGKVDTARMGVMGXXMGGGXSLXSAANNPSLKAAXXQAPWXSXTNFSSVTVPTLIFACENDSIAPVNSXALPIYDSMSXNAKQFLEINGGSHSCANXGNSBQXLIGKKGVAWMKRFMDNDTRYSTFACENPNSTXVSDFRTXNCS++XXXXXXARKEAXLAAATAEQ 312
ggplotly(ggseqlogo(as.character(aligned_petases)), width = 5000)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.